import requests
from bs4 import BeautifulSoup
import os

base_url = 'http://bj.58.com/'
base_url_mobile = 'http://m.58.com/bj'

def parse_channel_list():
    url = 'http://bj.58.com/sale.shtml'
    html = requests.get(url)
    soup = BeautifulSoup(html.text, 'lxml')
    items = soup.select('ul.ym-submnu > li > b > a')

    channel_urls = []

    for item in items:
        print(item)
        url = base_url_mobile + item.get('href')
        channel_urls.append(url)

    content = '\n'.join(channel_urls)

    write_to_file(content)

def write_to_file(content):
    path = '{0}/{1}'.format(os.getcwd(), 'doc')
    if not os.path.exists(path):
        os.makedirs(path)
    file_path = '{0}/{1}.{2}'.format(path, '58samecity', 'txt')
    with open(file_path, 'a', encoding='utf-8') as f:
        f.write(content + '\n')
        print('写入成功', content)
        f.close()

def get_channel_list():
    urls = []
    path = '{0}/{1}'.format(os.getcwd(), 'doc')
    file_path = '{0}/{1}.{2}'.format(path, '58samecity', 'txt')
    with open(file_path, 'r') as f:
        lines = f.readlines()
        for line in lines:
            urls.append(line.strip())

    # 去除重复
    urls1 = sorted(set(urls), key=urls.index)
    print(urls1)

    return urls1

if __name__ == '__main__':
    # parse_channel_list()
    get_channel_list()